# -*- coding: utf-8 -*-
import urllib2
from dict.models import Word
from pyquery import PyQuery as pq
from django.shortcuts import render_to_response

# Create your views here.
def index(req):
    if req.method == 'POST':
        word_name = req.POST['word']
        word = None

        try:
            # Check if the word already exists in database
            word = Word.objects.get(name = word_name)
        except:
            word = get_word_info(word_name.encode('utf8'))
            if word != None:
                try:
                    # Try to save word info into database
                    Word.objects.create(name = word.name, duden_link = word.duden_link, audio_link = word.audio_link, worttrennung = word.worttrennung, betonung = word.betonung)
                except Exception, e:
                    pass
        if word != None:
            return render_to_response('index.html', {'word_name': word.name, 'audio_link': word.audio_link, 'duden_link': word.duden_link, 'betonung': word.betonung, 'worttrennung': word.worttrennung})
        else:
            return render_to_response('index.html', {'word_name': word_name})
    else:
        return render_to_response('index.html')
	
# Helpers
def get_word_info(name):
    base_search_url = 'http://www.duden.de/suchen/dudenonline/'
    base_url = 'http://www.duden.de'
    count = 3

    # It may fail to get word info because of network issue
    while True and count > 0:
        try:
            # Get search results html
            response = urllib2.urlopen(base_search_url + name)
            content = response.read()

            # Create PyQuery object
            document = pq(content)
    
            # Find first hit in result page
            first_hit_result = document.find('div.lemma-hit')
            if first_hit_result.length == 0:
                return None
            else:
                # Get the first hit element in first-hit elements
                first_hit_result = pq(first_hit_result[0])

            # If the first hit is in bold, get the url of the word
            word_link = None
            title = first_hit_result.find('h3 a')
            if title.find('strong').length != 0:
                word_link = title.attr('href')
                
                # Get the correct name of word
                name = title.text()
            
            if word_link == None:
                return None

            # Get the html of word page
            content = urllib2.urlopen(base_url + word_link).read()
            document = pq(content)

            word = Word()
            word.name = name
            word.duden_link = base_url + word_link 

            # Get worttrennung of the word
            # Get the prev sibling of worttrennung element
            worttrennung_object = None
            worttrennung_prev_sibling = document.find(":contains('Worttrennung:')")         
            if worttrennung_prev_sibling.length != 0:
                worttrennung_object = pq(worttrennung_prev_sibling[0]).siblings()
                worttrennung_object = pq(worttrennung_object)
                if worttrennung_object.length != 0:
                    word.worttrennung = worttrennung_object.text() 

            # Get betonung of the word
            betonung_object = document.find('span.font_ph')
            if betonung_object.length != 0:
                word.betonung = betonung_object.text()
            
            # Get audio link of the word
            audio_object = document.find('span.audio')
            if audio_object.length != 0:
                word.audio_link = audio_object.find('p a').eq(1).attr('href')
            
            return word
        except:
            count -= 1

